package com.galeno.算子

import com.galeno.utils.SparkUtil
import org.apache.spark.rdd.RDD

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/9/319:12
 */
object AggregatorDemo  {
  def main(args: Array[String]): Unit = {
    val sc = SparkUtil.getSc
    //传入的两个函数不需要一样,初始值在局部聚合是使用
    val rdd1 = sc.parallelize(List(("cat", 2), ("cat", 5), ("mouse", 4),
      ("cat", 12), ("dog", 12), ("mouse", 2)), 2)
    val rdd2: RDD[(String, Int)] = rdd1.aggregateByKey(0)(Math.max(_, _), _ + _)
    println(rdd2.collect().toList)

    Thread.sleep(Integer.MAX_VALUE)


  }

}
